pacman::p_load(tidyverse,openxlsx,reshape2)

precinct.num <- function(df) {
  vec1<-c()
  vec2<-c()
  frac <- df%>%
    dplyr::select(is.numeric)
  for (z in 1:ncol(frac)){
    vec2<-append(as.numeric(apply(frac[,z]%>%
                                    as.data.frame(.)%>%
                                    na.omit(.),2,length)),vec2)
  }
  for (i in 1:(ncol(frac)-1)){
    for (j in (i+1):ncol(frac)){
      vec1<-append((sum(complete.cases(frac[,i], frac[,j]))),vec1)
      print(sum(complete.cases(frac[,i], frac[,j])))
    }
  }
  return(list(max.complete=max(vec1),
              min.complete=min(vec1),
              min.obs=min(vec2),
              max.obs=max(vec2)))
}#extract number of 1) complete observations across candidates and 2) number of non-NA values for each candidate

setwd("")#set working directory
dta <- read.csv("")

#2022
colnames(dta)[grepl("ussen22...REP", colnames(dta))]
dta$GOPPRIMARYTOT22 <- dta$ussen22...REPLillieBoddie+
  dta$ussen22...REPKatieBritt+dta$ussen22...REPMoBrooks+
  dta$ussen22...REPKarlaM.Dupriest+dta$ussen22...REPMikeDurant+
  dta$ussen22...REPJakeSchafer

dta$BROOKSPCT22 <- dta$ussen22...REPMoBrooks/dta$GOPPRIMARYTOT22 
dta$DURANTPCT22 <- dta$ussen22...REPMikeDurant/dta$GOPPRIMARYTOT22
dta$BRITTPCT22 <- dta$ussen22...REPKatieBritt/dta$GOPPRIMARYTOT22 

#2020
colnames(dta)[grepl("ussen20", colnames(dta))]
dta$GOPPRIMARYTOT20 <- dta$ussen20...Tommy.Tuberville+dta$ussen20...Jeff.Sessions+
  dta$ussen20...Roy.Moore+dta$ussen20...Arnold.Mooney+dta$ussen20...Bradley.Byrne+
  dta$ussen20...Ruth.Page.Nelson+dta$ussen20...Stanley.Adair

dta$SESSIONSPCT20 <- dta$ussen20...Jeff.Sessions/dta$GOPPRIMARYTOT20
dta$TUBERVILLEPCT20 <- dta$ussen20...Tommy.Tuberville/dta$GOPPRIMARYTOT20
dta$BYRNEPCT20 <- dta$ussen20...Bradley.Byrne/dta$GOPPRIMARYTOT20

#2017
colnames(dta)[grepl("ussen17...", colnames(dta))]#no under/over votes, all republican candidates
dta$GOPPRIMARYTOT17  <- rowSums(dta[, grepl("ussen17...", names(dta))], na.rm=T)
dta$MOOREPCT17 <- dta$ussen17...Roy.S..Moore/dta$GOPPRIMARYTOT17
dta$STRANGEPCT17 <- dta$ussen17...Luther.Strange/dta$GOPPRIMARYTOT17
dta$BROOKSPCT17 <- dta$ussen17...Mo.Brooks/dta$GOPPRIMARYTOT17

#2016 (pres)
colnames(dta)[grepl("pres16...", colnames(dta))]#only rep candidates
dta$GOPPRIMARYTOT16 <- dta$pres16...Ben.Carson+dta$pres16...Carly.Fiorina+
  dta$pres16...Chris.Christie+dta$pres16...Donald.J..Trump+dta$pres16...Jeb.Bush+
  dta$pres16...John.R..Kasich+dta$pres16...Lindsey.Graham+dta$pres16...Marco.Rubio+
  dta$pres16...Mike.Huckabee+dta$pres16...Rand.Paul+dta$pres16...Rick.Santorum+dta$pres16...Ted.Cruz+
  dta$pres16...Uncommitted#included uncommitted votes

dta$CRUZPCT16 <- dta$pres16...Ted.Cruz/dta$GOPPRIMARYTOT16
dta$RUBIOPCT16 <- dta$pres16...Marco.Rubio/dta$GOPPRIMARYTOT16
dta$CARSONPCT16 <- dta$pres16...Ben.Carson/dta$GOPPRIMARYTOT16
dta$TRUMPPCT16 <- dta$pres16...Donald.J..Trump/dta$GOPPRIMARYTOT16

#2016 (senate)
colnames(dta)[grepl("ussen16...", colnames(dta))]#only rep candidates
dta$GOPSENPRIMARYTOT16 <- dta$ussen16...John.Martin+dta$ussen16...Jonathan.McConnell+
  dta$ussen16...Marcus.Bowman+dta$ussen16...Richard.C..Shelby+dta$ussen16...Shadrack.McGill
dta$SHELBY.SENPCT16 <- dta$ussen16...Richard.C..Shelby/dta$GOPSENPRIMARYTOT16
dta$MCCONNEL.SENPCT16 <- dta$ussen16...Jonathan.McConnell/dta$GOPSENPRIMARYTOT16

#generate summary table
dta.sub <- dta%>%dplyr::select("SHELBY.SENPCT16","MCCONNEL.SENPCT16", 
                               "TRUMPPCT16","CRUZPCT16","RUBIOPCT16","CARSONPCT16",
                               "MOOREPCT17", "STRANGEPCT17", "BROOKSPCT17",
                               "SESSIONSPCT20","TUBERVILLEPCT20", "BYRNEPCT20", 
                               "BROOKSPCT22","BRITTPCT22","DURANTPCT22")
precinct.num(dta.sub)

cormatd <- cor(dta.sub,use="pairwise.complete.obs")
cormatd2 <- cormatd[,-c(1,2)]

print(xtable(cormatd2, 
             caption=c("Correlations of vote shares in Republican primaries from all Alabama precincts (n=1768-2064)."),
             label="t:al:gop", 
             digits=c(0,rep(3,13))),
      scalebox=0.55, 
      file=c("tableA23.tex"))#table A23

tableA23 <- readLines("tableA23.tex")
tableA23 <- gsub("SHELBY.SENPCT16", "Shelby '16", tableA23)
tableA23 <- gsub("MCCONNEL.SENPCT16", "McConnel '16", tableA23)
tableA23 <- gsub("TRUMPPCT16", "Trump '16", tableA23)
tableA23 <- gsub("CRUZPCT16", "Cruz '16", tableA23)
tableA23 <- gsub("RUBIOPCT16", "Rubio '16", tableA23)
tableA23 <- gsub("CARSONPCT16", "Carson '16", tableA23)
tableA23 <- gsub("MOOREPCT17", "Moore '17", tableA23)
tableA23 <- gsub("STRANGEPCT17", "Strange '17", tableA23)
tableA23 <- gsub("BROOKSPCT17", "Brooks '17", tableA23)
tableA23 <- gsub("SESSIONSPCT20", "Sessions '20", tableA23)
tableA23 <- gsub("TUBERVILLEPCT20", "Tuberville '20", tableA23)
tableA23 <- gsub("BYRNEPCT20", "Bryne '20", tableA23)
tableA23 <- gsub("BROOKSPCT22", "Brooks '22", tableA23)
tableA23 <- gsub("BRITTPCT22", "Britt '22", tableA23)
tableA23 <- gsub("DURANTPCT22", "Durant '22", tableA23)
writeLines(tableA23, "tableA23.tex")

cormatd3 <- data.frame(cbind(variable_2 = rownames(cormatd2), cormatd2))
cormatd4 <- melt(cormatd3, id = c("variable_2"))
cormatd4$year_cand1_1 <- ifelse(grepl("PCT16", cormatd4$variable_2), 2016, 0)
cormatd4$year_cand1_2 <- ifelse(grepl("PCT17", cormatd4$variable_2), 2017, 0)
cormatd4$year_cand1_3 <- ifelse(grepl("PCT20", cormatd4$variable_2), 2020, 0)
cormatd4$year_cand1_4 <- ifelse(grepl("PCT22", cormatd4$variable_2), 2022, 0)
cormatd4$year_cand1 <- cormatd4$year_cand1_1 + cormatd4$year_cand1_2 + cormatd4$year_cand1_3 + cormatd4$year_cand1_4

cormatd4$year_cand2_1 <- ifelse(grepl("PCT16", cormatd4$variable), 2016, 0)
cormatd4$year_cand2_2 <- ifelse(grepl("PCT17", cormatd4$variable), 2017, 0)
cormatd4$year_cand2_3 <- ifelse(grepl("PCT20", cormatd4$variable), 2020, 0)
cormatd4$year_cand2_4 <- ifelse(grepl("PCT22", cormatd4$variable), 2022, 0)
cormatd4$year_cand2 <- cormatd4$year_cand2_1 + cormatd4$year_cand2_2 + cormatd4$year_cand2_3 + cormatd4$year_cand2_4

cormatd5 <-cormatd4 %>%
  subset(., select=c(variable_2, year_cand1, variable, year_cand2, value))
cormatd5$value <- as.numeric(cormatd5$value)
cormatd5$variable <- as.character(cormatd5$variable)
cormatd5$value <- cormatd5$value
cormatd5$party <- "GOP"
cormatd5$state <- "AL"
write.csv(cormatd5, "al_groupbyyear_corrs.csv")
cormatd5$variable_2
gop.grouped<-cormatd5%>%
  mutate(year_cand1=ifelse(variable_2 %in% c("MCCONNEL.SENPCT16","SHELBY.SENPCT16"), "2016*", as.character(year_cand1)))%>%
  mutate(year_cand2=ifelse(variable %in% c("MCCONNEL.SENPCT16","SHELBY.SENPCT16"), "2016*", as.character(year_cand2)))%>%
  filter(year_cand1!=year_cand2)%>%
  filter(variable_2!=variable)%>%
  group_by(party,year_cand1,year_cand2)%>%
  dplyr::summarise(cor_mean=mean(value,na.rm=T), 
                   cor_median=median(value,na.rm=T))

#prepare corr summary table

corr.summary<-gop.grouped%>%
  setNames(c("Party","Year 1", "Year 2", "Corr. Mean", "Corr. Median"))%>%
  mutate(Party=Party%>%dplyr::recode(
    "GOP"="Republicans"
  ))

#2016-GOP-PRES
al.gop.pres.16 <- dta %>%
  dplyr::select(pres16...Ben.Carson, pres16...Carly.Fiorina, pres16...Chris.Christie, pres16...Donald.J..Trump, 
                pres16...Jeb.Bush, pres16...John.R..Kasich, pres16...Lindsey.Graham, pres16...Marco.Rubio, pres16...Mike.Huckabee, 
                pres16...Rand.Paul, pres16...Rick.Santorum, pres16...Ted.Cruz, pres16...Uncommitted)
al.gop.pres.16.voteshare <- colSums(al.gop.pres.16, na.rm=T) / sum(al.gop.pres.16, na.rm=T) 
al.gop.pres.16.voteshare <- al.gop.pres.16.voteshare %>% as.data.frame(.)
al.gop.pres.16.voteshare$cand <- row.names(al.gop.pres.16.voteshare)
colnames(al.gop.pres.16.voteshare)[1] <- "voteshare"
row.names(al.gop.pres.16.voteshare) <- NULL
al.gop.pres.16.voteshare$year <- 2016

#2016-GOP-SEN
al.gop.sen.16 <- dta %>%
  dplyr::select(ussen16...John.Martin, ussen16...Jonathan.McConnell,ussen16...Marcus.Bowman,ussen16...Richard.C..Shelby,ussen16...Shadrack.McGill)
al.gop.sen.16.voteshare <- colSums(al.gop.sen.16, na.rm=T) / sum(al.gop.sen.16, na.rm=T) 
al.gop.sen.16.voteshare <- al.gop.sen.16.voteshare %>% as.data.frame(.)
al.gop.sen.16.voteshare$cand <- row.names(al.gop.sen.16.voteshare)
colnames(al.gop.sen.16.voteshare)[1] <- "voteshare"
row.names(al.gop.sen.16.voteshare) <- NULL
al.gop.sen.16.voteshare$year <- 2016

#2017-GOP-SEN
al.gop.sen.17 <- dta %>%
  dplyr::select(contains('ussen17..'))
al.gop.sen.17.voteshare <- colSums(al.gop.sen.17, na.rm=T) / sum(al.gop.sen.17, na.rm=T) 
al.gop.sen.17.voteshare <- al.gop.sen.17.voteshare %>% as.data.frame(.)
al.gop.sen.17.voteshare$cand <- row.names(al.gop.sen.17.voteshare)
colnames(al.gop.sen.17.voteshare)[1] <- "voteshare"
row.names(al.gop.sen.17.voteshare) <- NULL
al.gop.sen.17.voteshare$year <- 2017

#2020-GOP-SEN
al.gop.sen.20 <- dta %>%
  dplyr::select(ussen20...Tommy.Tuberville, ussen20...Jeff.Sessions, ussen20...Roy.Moore, ussen20...Arnold.Mooney, 
                ussen20...Bradley.Byrne, ussen20...Ruth.Page.Nelson, ussen20...Stanley.Adair)
al.gop.sen.20.voteshare <- colSums(al.gop.sen.20, na.rm=T) / sum(al.gop.sen.20, na.rm=T) 
al.gop.sen.20.voteshare <- al.gop.sen.20.voteshare %>% as.data.frame(.)
al.gop.sen.20.voteshare$cand <- row.names(al.gop.sen.20.voteshare)
colnames(al.gop.sen.20.voteshare)[1] <- "voteshare"
row.names(al.gop.sen.20.voteshare) <- NULL
al.gop.sen.20.voteshare$year <- 2020

#2022-GOP-SEN
dta$ussen22...REPJakeSchafer
freq(dta$ussen22...REPJakeSchafer)
al.gop.sen.22 <- dta %>%
  dplyr::select(ussen22...REPLillieBoddie, ussen22...REPKatieBritt, ussen22...REPMoBrooks, 
                ussen22...REPKarlaM.Dupriest, ussen22...REPMikeDurant, ussen22...REPJakeSchafer)
al.gop.sen.22.voteshare <- colSums(al.gop.sen.22, na.rm=T) / sum(al.gop.sen.22, na.rm=T) 
al.gop.sen.22.voteshare <- al.gop.sen.22.voteshare %>% as.data.frame(.)
al.gop.sen.22.voteshare$cand <- row.names(al.gop.sen.22.voteshare)
colnames(al.gop.sen.22.voteshare)[1] <- "voteshare"
row.names(al.gop.sen.22.voteshare) <- NULL
al.gop.sen.22.voteshare$year <- 2022
  
al.voteshares <- rbind(al.gop.sen.22.voteshare, al.gop.sen.20.voteshare, al.gop.sen.17.voteshare, al.gop.pres.16.voteshare, al.gop.sen.16.voteshare)
write.csv(al.voteshares, "al-voteshares.csv")